In [106]:
    
pd.read_csv?
    
In [103]:
    
df = pd.read_csv("../../../linux/iso_time.log", header=None, names=['timestamp'])
df.head()
    
    Out[103]:
  
    
       
      timestamp 
     
  
  
    
      0 
      2017-08-15 12:49:43 -0700 
     
    
      1 
      2017-08-14 13:09:59 -0700 
     
    
      2 
      2017-08-14 11:35:56 -0700 
     
    
      3 
      2017-08-13 16:01:32 -0700 
     
    
      4 
      2017-08-13 15:34:28 -0700 
     
  
In [139]:
    
t = pd.to_datetime(df[:5]['timestamp'], utc=True)
tz = t[0]
tz.tz
    
In [148]:
    
d = pd.DataFrame(['2017-08-15 12:49:43 -0700'], columns=["t"])
pd.to_datetime(d['t'])
    
    Out[148]:
0   2017-08-15 19:49:43
Name: t, dtype: datetime64[ns]
In [13]:
    
import pandas as pd
git_log = pd.read_csv(
    "../../../linux/git_log_basic.bz2",
    sep="#",
    header=None,
    names=['timestamp', 'author', 'email']
)
git_log.head()
    
    Out[13]:
  
    
       
      timestamp 
      author 
      email 
     
  
  
    
      0 
      1502826583 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
    
      1 
      1502741399 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
    
      2 
      1502735756 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
    
      3 
      1502665292 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
    
      4 
      1502663668 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
  
In [9]:
    
import pandas as pd
parse_timestamp_in_s = lambda x : pd.to_datetime(x, unit="s")
git_log = pd.read_csv(
    "../../../linux/git_log_basic.bz2",
    encoding="utf-8",
    sep="#",
    header=None,
    names=['timestamp', 'author', 'email'],
    parse_dates=[0],
    date_parser= parse_timestamp_in_s,
    index_col=0,
    compression="bz2"
)
git_log.head()
    
    Out[9]:
  
    
       
      author 
      email 
     
    
      timestamp 
       
       
     
  
  
    
      2017-08-15 19:49:43 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
    
      2017-08-14 20:09:59 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
    
      2017-08-14 18:35:56 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
    
      2017-08-13 23:01:32 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
    
      2017-08-13 22:34:28 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
  
In [10]:
    
git_log['author'].sort_values()
    
    Out[10]:
timestamp
2017-01-26 21:29:48    =?UTF-8?q?Simon=20Sandstr=C3=B6m?=
2005-06-20 12:22:55                                     ?
2010-08-29 20:51:52                          A E Lawrence
2014-08-07 08:40:39                     A Raghavendra Rao
2014-06-11 04:58:58                     A Raghavendra Rao
2017-03-26 18:28:08                                 A Sun
2017-04-13 08:06:47                                 A Sun
2017-03-26 19:04:51                                 A Sun
2017-03-26 18:33:07                                 A Sun
2006-03-14 23:44:23                   A. Maitland Bottoms
2015-04-16 08:52:09                        A.Sunil Kamath
2014-11-24 08:07:44                        A.Sunil Kamath
2016-11-20 17:50:26                        A.Sunil Kamath
2015-04-16 08:52:12                        A.Sunil Kamath
2005-11-15 07:55:18                           A.YOSHIYAMA
2009-02-12 23:14:18                        ADDI-DATA GmbH
2014-07-04 07:28:31                       AKASHI Takahiro
2014-01-17 08:05:03                       AKASHI Takahiro
2014-03-15 05:48:00                       AKASHI Takahiro
2016-08-22 06:55:24                       AKASHI Takahiro
2014-04-30 09:54:36                       AKASHI Takahiro
2014-04-30 09:54:35                       AKASHI Takahiro
2016-11-14 06:15:05                       AKASHI Takahiro
2017-04-03 02:24:40                       AKASHI Takahiro
2017-04-03 02:24:39                       AKASHI Takahiro
2017-04-03 02:24:38                       AKASHI Takahiro
2014-05-20 11:31:04                       AKASHI Takahiro
2015-10-30 05:25:39                       AKASHI Takahiro
2017-04-03 02:24:36                       AKASHI Takahiro
2013-09-24 09:00:50                       AKASHI Takahiro
                                      ...                
2015-09-25 09:54:58                        Łukasz Daniluk
2006-06-22 08:37:19                       Łukasz Stelmach
2015-03-27 20:39:59                       Łukasz Stelmach
2015-07-29 19:31:23                       Łukasz Stelmach
2014-12-16 15:53:08                       Łukasz Stelmach
2011-02-24 09:03:31                    Łukasz Wojniłowicz
2010-01-24 13:12:37                    Łukasz Wojniłowicz
2009-11-20 11:14:35                    Łukasz Wojniłowicz
2011-02-07 12:13:27                    Łukasz Wojniłowicz
2013-11-01 13:26:38                    Алексей Крамаренко
2016-04-05 16:14:10               Буди Романто, AreMa Inc
2015-07-21 10:31:52                     Николай Кудрявцев
2013-07-12 06:33:33                               “Cosmin
2014-10-06 06:21:27                                    ほち
2013-12-18 07:37:17                                    张君
2013-06-30 09:09:28                                   张忠山
2016-09-26 08:29:31                                   彭东林
2015-06-05 05:00:24                                   洪一竹
2013-12-20 02:04:10                                   蔡正龙
2013-12-20 02:04:10                                   蔡正龙
2012-05-28 13:31:29                                   说不得
2011-06-09 01:42:48                                   马建朋
2009-01-29 16:57:17                                   김규원
2017-02-01 10:25:46                                   남영민
2013-04-18 14:01:05                                   송은봉
2013-04-17 21:40:17                                   송은봉
2013-11-27 00:42:41                                   송은봉
2012-08-29 13:58:12                                   이건호
2016-12-08 12:01:13                                   추지호
2005-07-02 06:27:00                                   NaN
Name: author, Length: 692885, dtype: object
Note: We are using the UNIX timestamp here because it's superfast to convert it to a real datatime64 data type.
_Note: 'today'is suboptimal
In [234]:
    
first_commit = git_log.index[-1]
first_commit
    
    Out[234]:
Timestamp('2005-04-16 22:20:36')
In [11]:
    
today = pd.to_datetime('today')
type(today)
    
    Out[11]:
pandas._libs.tslib.Timestamp
In [237]:
    
git_log[(git_log < today) & (git_log >= first_commit)]
    
    Out[237]:
  
    
       
      author 
      email 
     
    
      timestamp 
       
       
     
  
  
    
      2017-08-29 18:16:21 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
    
      2017-08-29 18:13:52 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
    
      2017-08-29 16:11:06 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
    
      2017-08-29 15:36:58 
      Tejun Heo 
      tj@kernel.org 
     
    
      2017-08-29 12:42:06 
      Christoph Hellwig 
      hch@lst.de 
     
    
      2017-08-28 23:45:40 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
    
      2017-08-28 21:51:27 
      Tejun Heo 
      tj@kernel.org 
     
    
      2017-08-28 22:03:58 
      Alexey Brodkin 
      abrodkin@synopsys.com 
     
    
      2017-08-23 20:37:00 
      Helge Deller 
      deller@gmx.de 
     
    
      2017-08-28 18:15:46 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
    
      2017-08-28 06:28:08 
      Christoph Hellwig 
      hch@lst.de 
     
    
      2017-08-28 00:20:40 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
    
      2017-08-28 00:10:34 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
    
      2017-08-28 00:08:37 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
    
      2017-08-28 00:03:33 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
    
      2017-08-28 00:01:54 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
    
      2017-08-27 23:25:09 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
    
      2017-08-27 20:55:12 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
    
      2017-08-27 19:12:25 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
    
      2017-08-26 19:48:29 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
    
      2017-08-26 19:46:14 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
    
      2017-08-26 16:06:28 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
    
      2017-08-26 16:02:18 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
    
      2017-08-26 15:59:50 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
    
      2017-08-26 01:02:27 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
    
      2017-08-26 00:46:23 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
    
      2017-08-26 00:40:03 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
    
      2017-08-26 00:32:35 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
    
      2017-08-26 00:27:26 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
    
      2017-08-26 00:22:33 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
    
      ... 
      ... 
      ... 
     
    
      2005-04-16 22:24:17 
      Paul Mackerras 
      paulus@samba.org 
     
    
      2005-04-16 22:24:17 
      Paul Mackerras 
      paulus@samba.org 
     
    
      2005-04-16 22:24:16 
      Paul Mackerras 
      paulus@samba.org 
     
    
      2005-04-16 22:24:15 
      Eugene Surovegin 
      ebs@ebshome.net 
     
    
      2005-04-16 22:24:14 
      Paul Mackerras 
      paulus@samba.org 
     
    
      2005-04-16 22:24:13 
      James Morris 
      jmorris@redhat.com 
     
    
      2005-04-16 22:24:11 
      Jean Tourrilhes 
      jt@hpl.hp.com 
     
    
      2005-04-16 22:24:10 
      Herbert Xu 
      herbert@gondor.apana.org.au 
     
    
      2005-04-16 22:24:09 
      Arnaldo Carvalho de Melo 
      acme@conectiva.com.br 
     
    
      2005-04-16 22:24:09 
      David S. Miller 
      davem@davemloft.net 
     
    
      2005-04-16 22:24:08 
      Martin Hicks 
      mort@sgi.com 
     
    
      2005-04-16 22:24:07 
      akpm@osdl.org 
      akpm@osdl.org 
     
    
      2005-04-16 22:24:06 
      akpm@osdl.org 
      akpm@osdl.org 
     
    
      2005-04-16 22:24:05 
      Andrea Arcangeli 
      andrea@suse.de 
     
    
      2005-04-16 22:24:05 
      Jeff Moyer 
      jmoyer@redhat.com 
     
    
      2005-04-16 22:24:04 
      Dave Airlie 
      airlied@linux.ie 
     
    
      2005-04-16 22:24:03 
      James Morris 
      jmorris@redhat.com 
     
    
      2005-04-16 22:24:02 
      akpm@osdl.org 
      akpm@osdl.org 
     
    
      2005-04-16 22:24:01 
      David S. Miller 
      davem@davemloft.net 
     
    
      2005-04-16 22:24:01 
      Stas Sergeev 
      stsp@aknet.ru 
     
    
      2005-04-16 22:24:00 
      akpm@osdl.org 
      akpm@osdl.org 
     
    
      2005-04-16 22:23:59 
      James Bottomley 
      James.Bottomley@SteelEye.com 
     
    
      2005-04-16 22:23:58 
      Artem B. Bityuckiy 
      dedekind@infradead.org 
     
    
      2005-04-16 22:23:57 
      akpm@osdl.org 
      akpm@osdl.org 
     
    
      2005-04-16 22:23:57 
      akpm@osdl.org 
      akpm@osdl.org 
     
    
      2005-04-16 22:23:56 
      akpm@osdl.org 
      akpm@osdl.org 
     
    
      2005-04-16 22:23:55 
      akpm@osdl.org 
      akpm@osdl.org 
     
    
      2005-04-16 22:23:54 
      Neil Brown 
      neilb@cse.unsw.edu.au 
     
    
      2005-04-16 22:23:53 
      Christoph Lameter 
      clameter@engr.sgi.com 
     
    
      2005-04-16 22:20:36 
      Linus Torvalds 
      torvalds@ppc970.osdl.org 
     
  
693323 rows × 2 columns
In [222]:
    
corrected_dates = git_log.iloc[
    -2
]
# & (git_log.index <= 'today')
corrected_dates
    
    Out[222]:
author        Christoph Lameter
email     clameter@engr.sgi.com
Name: 2005-04-16 22:23:53, dtype: object
In [226]:
    
corrected_dates = git_log.iloc[:, -1]
# & (git_log.index <= 'today')
corrected_dates
    
    Out[226]:
timestamp
2017-08-29 18:16:21    torvalds@linux-foundation.org
2017-08-29 18:13:52    torvalds@linux-foundation.org
2017-08-29 16:11:06    torvalds@linux-foundation.org
2017-08-29 15:36:58                    tj@kernel.org
2017-08-29 12:42:06                       hch@lst.de
2017-08-28 23:45:40    torvalds@linux-foundation.org
2017-08-28 21:51:27                    tj@kernel.org
2017-08-28 22:03:58            abrodkin@synopsys.com
2017-08-23 20:37:00                    deller@gmx.de
2017-08-28 18:15:46    torvalds@linux-foundation.org
2017-08-28 06:28:08                       hch@lst.de
2017-08-28 00:20:40    torvalds@linux-foundation.org
2017-08-28 00:10:34    torvalds@linux-foundation.org
2017-08-28 00:08:37    torvalds@linux-foundation.org
2017-08-28 00:03:33    torvalds@linux-foundation.org
2017-08-28 00:01:54    torvalds@linux-foundation.org
2017-08-27 23:25:09    torvalds@linux-foundation.org
2017-08-27 20:55:12    torvalds@linux-foundation.org
2017-08-27 19:12:25    torvalds@linux-foundation.org
2017-08-26 19:48:29    torvalds@linux-foundation.org
2017-08-26 19:46:14    torvalds@linux-foundation.org
2017-08-26 16:06:28    torvalds@linux-foundation.org
2017-08-26 16:02:18    torvalds@linux-foundation.org
2017-08-26 15:59:50    torvalds@linux-foundation.org
2017-08-26 01:02:27    torvalds@linux-foundation.org
2017-08-26 00:46:23    torvalds@linux-foundation.org
2017-08-26 00:40:03    torvalds@linux-foundation.org
2017-08-26 00:32:35    torvalds@linux-foundation.org
2017-08-26 00:27:26    torvalds@linux-foundation.org
2017-08-26 00:22:33    torvalds@linux-foundation.org
                                   ...              
2005-04-16 22:24:17                 paulus@samba.org
2005-04-16 22:24:17                 paulus@samba.org
2005-04-16 22:24:16                 paulus@samba.org
2005-04-16 22:24:15                  ebs@ebshome.net
2005-04-16 22:24:14                 paulus@samba.org
2005-04-16 22:24:13               jmorris@redhat.com
2005-04-16 22:24:11                    jt@hpl.hp.com
2005-04-16 22:24:10      herbert@gondor.apana.org.au
2005-04-16 22:24:09            acme@conectiva.com.br
2005-04-16 22:24:09              davem@davemloft.net
2005-04-16 22:24:08                     mort@sgi.com
2005-04-16 22:24:07                    akpm@osdl.org
2005-04-16 22:24:06                    akpm@osdl.org
2005-04-16 22:24:05                   andrea@suse.de
2005-04-16 22:24:05                jmoyer@redhat.com
2005-04-16 22:24:04                 airlied@linux.ie
2005-04-16 22:24:03               jmorris@redhat.com
2005-04-16 22:24:02                    akpm@osdl.org
2005-04-16 22:24:01              davem@davemloft.net
2005-04-16 22:24:01                    stsp@aknet.ru
2005-04-16 22:24:00                    akpm@osdl.org
2005-04-16 22:23:59     James.Bottomley@SteelEye.com
2005-04-16 22:23:58           dedekind@infradead.org
2005-04-16 22:23:57                    akpm@osdl.org
2005-04-16 22:23:57                    akpm@osdl.org
2005-04-16 22:23:56                    akpm@osdl.org
2005-04-16 22:23:55                    akpm@osdl.org
2005-04-16 22:23:54            neilb@cse.unsw.edu.au
2005-04-16 22:23:53            clameter@engr.sgi.com
2005-04-16 22:20:36         torvalds@ppc970.osdl.org
Name: email, Length: 693323, dtype: object
In [209]:
    
%matplotlib inline
corrected_dates = git_log.loc[
    str(git_log.index[-1]) : '2017-1-1'
]
# & (git_log.index <= 'today')
corrected_dates
    
    Out[209]:
  
    
       
      author 
      email 
     
    
      timestamp 
       
       
     
  
  
    
      2016-08-05 09:55:18 
      Philipp Zabel 
      p.zabel@pengutronix.de 
     
    
      2014-03-19 17:41:37 
      Arnd Bergmann 
      arnd@arndb.de 
     
    
      2016-06-10 08:51:04 
      Arnd Bergmann 
      arnd@arndb.de 
     
    
      2014-03-25 23:17:09 
      Arnd Bergmann 
      arnd@arndb.de 
     
    
      2016-01-15 12:04:01 
      Arnd Bergmann 
      arnd@arndb.de 
     
    
      2016-01-14 15:57:33 
      Arnd Bergmann 
      arnd@arndb.de 
     
    
      2015-12-29 16:09:18 
      Arnd Bergmann 
      arnd@arndb.de 
     
    
      2015-12-29 13:40:00 
      Arnd Bergmann 
      arnd@arndb.de 
     
    
      2016-01-15 12:32:43 
      Arnd Bergmann 
      arnd@arndb.de 
     
    
      2016-01-15 13:41:03 
      Arnd Bergmann 
      arnd@arndb.de 
     
    
      2016-01-16 23:37:36 
      Arnd Bergmann 
      arnd@arndb.de 
     
    
      2014-01-28 20:17:41 
      Arnd Bergmann 
      arnd@arndb.de 
     
    
      2015-11-22 14:24:28 
      Alexander Sverdlin 
      alexander.sverdlin@gmail.com 
     
    
      2014-01-28 20:17:41 
      Arnd Bergmann 
      arnd@arndb.de 
     
    
      2014-01-28 20:17:41 
      Arnd Bergmann 
      arnd@arndb.de 
     
    
      2014-03-15 10:21:06 
      Arnd Bergmann 
      arnd@arndb.de 
     
    
      2016-12-01 19:55:45 
      Souptick Joarder 
      jrdr.linux@gmail.com 
     
    
      2016-11-29 06:49:19 
      Christophe JAILLET 
      christophe.jaillet@wanadoo.fr 
     
    
      2016-11-08 12:00:31 
      Ravikant B Sharma 
      ravikant.s2@samsung.com 
     
    
      2016-11-16 18:19:25 
      Benson Leung 
      bleung@chromium.org 
     
    
      2016-12-20 09:50:09 
      Serge Semin 
      fancer.lancer@gmail.com 
     
    
      2016-12-13 23:49:20 
      Serge Semin 
      fancer.lancer@gmail.com 
     
    
      2016-12-13 23:49:19 
      Serge Semin 
      fancer.lancer@gmail.com 
     
    
      2016-12-20 09:48:20 
      Serge Semin 
      fancer.lancer@gmail.com 
     
    
      2016-12-13 23:49:15 
      Serge Semin 
      fancer.lancer@gmail.com 
     
    
      2016-12-13 23:49:14 
      Serge Semin 
      fancer.lancer@gmail.com 
     
    
      2016-12-13 23:49:13 
      Serge Semin 
      fancer.lancer@gmail.com 
     
    
      2016-12-28 00:12:28 
      Allen Hubbe 
      Allen.Hubbe@dell.com 
     
    
      2016-12-28 00:12:27 
      Allen Hubbe 
      Allen.Hubbe@dell.com 
     
    
      2016-07-27 14:32:20 
      Jan Kiszka 
      jan.kiszka@siemens.com 
     
    
      ... 
      ... 
      ... 
     
    
      2005-04-16 22:24:17 
      Paul Mackerras 
      paulus@samba.org 
     
    
      2005-04-16 22:24:17 
      Paul Mackerras 
      paulus@samba.org 
     
    
      2005-04-16 22:24:16 
      Paul Mackerras 
      paulus@samba.org 
     
    
      2005-04-16 22:24:15 
      Eugene Surovegin 
      ebs@ebshome.net 
     
    
      2005-04-16 22:24:14 
      Paul Mackerras 
      paulus@samba.org 
     
    
      2005-04-16 22:24:13 
      James Morris 
      jmorris@redhat.com 
     
    
      2005-04-16 22:24:11 
      Jean Tourrilhes 
      jt@hpl.hp.com 
     
    
      2005-04-16 22:24:10 
      Herbert Xu 
      herbert@gondor.apana.org.au 
     
    
      2005-04-16 22:24:09 
      Arnaldo Carvalho de Melo 
      acme@conectiva.com.br 
     
    
      2005-04-16 22:24:09 
      David S. Miller 
      davem@davemloft.net 
     
    
      2005-04-16 22:24:08 
      Martin Hicks 
      mort@sgi.com 
     
    
      2005-04-16 22:24:07 
      akpm@osdl.org 
      akpm@osdl.org 
     
    
      2005-04-16 22:24:06 
      akpm@osdl.org 
      akpm@osdl.org 
     
    
      2005-04-16 22:24:05 
      Andrea Arcangeli 
      andrea@suse.de 
     
    
      2005-04-16 22:24:05 
      Jeff Moyer 
      jmoyer@redhat.com 
     
    
      2005-04-16 22:24:04 
      Dave Airlie 
      airlied@linux.ie 
     
    
      2005-04-16 22:24:03 
      James Morris 
      jmorris@redhat.com 
     
    
      2005-04-16 22:24:02 
      akpm@osdl.org 
      akpm@osdl.org 
     
    
      2005-04-16 22:24:01 
      David S. Miller 
      davem@davemloft.net 
     
    
      2005-04-16 22:24:01 
      Stas Sergeev 
      stsp@aknet.ru 
     
    
      2005-04-16 22:24:00 
      akpm@osdl.org 
      akpm@osdl.org 
     
    
      2005-04-16 22:23:59 
      James Bottomley 
      James.Bottomley@SteelEye.com 
     
    
      2005-04-16 22:23:58 
      Artem B. Bityuckiy 
      dedekind@infradead.org 
     
    
      2005-04-16 22:23:57 
      akpm@osdl.org 
      akpm@osdl.org 
     
    
      2005-04-16 22:23:57 
      akpm@osdl.org 
      akpm@osdl.org 
     
    
      2005-04-16 22:23:56 
      akpm@osdl.org 
      akpm@osdl.org 
     
    
      2005-04-16 22:23:55 
      akpm@osdl.org 
      akpm@osdl.org 
     
    
      2005-04-16 22:23:54 
      Neil Brown 
      neilb@cse.unsw.edu.au 
     
    
      2005-04-16 22:23:53 
      Christoph Lameter 
      clameter@engr.sgi.com 
     
    
      2005-04-16 22:20:36 
      Linus Torvalds 
      torvalds@ppc970.osdl.org 
     
  
651369 rows × 2 columns
In [ ]:
    
    
In [53]:
    
grouped_by_time = corrected_dates.groupby(pd.TimeGrouper(freq="M")).count()
grouped_by_time.plot(figsize=(15,5))
grouped_by_time.head()
    
    Out[53]:
  
    
       
      timestamp 
      author 
      email 
     
    
      timestamp 
       
       
       
     
  
  
    
      2005-04-30 
      792 
      792 
      792 
     
    
      2005-05-31 
      1319 
      1319 
      1319 
     
    
      2005-06-30 
      2123 
      2123 
      2122 
     
    
      2005-07-31 
      1538 
      1537 
      1537 
     
    
      2005-08-31 
      1521 
      1521 
      1520 
     
  
    
 
Bonus: get rid of the incomplete months at the beginning or end (but does it make sens at all to remove them?)
One author didn't provide his/her name, so it's null. What to do about it? Remove it or set it to unknown?
In [54]:
    
git_log['author'] = git_log['author'].fillna("UNKNOWN")
git_log.head()
    
    Out[54]:
  
    
       
      timestamp 
      author 
      email 
     
    
      timestamp 
       
       
       
     
  
  
    
      2017-08-15 19:49:43 
      2017-08-15 19:49:43 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
    
      2017-08-14 20:09:59 
      2017-08-14 20:09:59 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
    
      2017-08-14 18:35:56 
      2017-08-14 18:35:56 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
    
      2017-08-13 23:01:32 
      2017-08-13 23:01:32 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
    
      2017-08-13 22:34:28 
      2017-08-13 22:34:28 
      Linus Torvalds 
      torvalds@linux-foundation.org 
     
  
In [56]:
    
git_log['author'].value_counts().head()
    
    Out[56]:
Linus Torvalds     23361
David S. Miller     8994
Mark Brown          6796
Takashi Iwai        6206
Al Viro             5993
Name: author, dtype: int64
In [69]:
    
git_log[git_log['author'].str.contains('Viro')]
    
    Out[69]:
  
    
       
      timestamp 
      author 
      email 
     
    
      timestamp 
       
       
       
     
  
  
    
      2017-07-12 03:59:45 
      2017-07-12 03:59:45 
      Al Viro 
      viro@ZenIV.linux.org.uk 
     
    
      2017-07-10 11:40:49 
      2017-07-10 11:40:49 
      Al Viro 
      viro@zeniv.linux.org.uk 
     
    
      2017-04-20 20:43:12 
      2017-04-20 20:43:12 
      Al Viro 
      viro@zeniv.linux.org.uk 
     
    
      2017-07-08 15:26:39 
      2017-07-08 15:26:39 
      Al Viro 
      viro@zeniv.linux.org.uk 
     
    
      2017-07-07 18:51:19 
      2017-07-07 18:51:19 
      Al Viro 
      viro@zeniv.linux.org.uk 
     
    
      2017-06-30 02:25:14 
      2017-06-30 02:25:14 
      Al Viro 
      viro@zeniv.linux.org.uk 
     
    
      2017-07-06 13:15:47 
      2017-07-06 13:15:47 
      Al Viro 
      viro@zeniv.linux.org.uk 
     
    
      2017-07-06 06:00:00 
      2017-07-06 06:00:00 
      Al Viro 
      viro@zeniv.linux.org.uk 
     
    
      2017-05-25 20:28:49 
      2017-05-25 20:28:49 
      Al Viro 
      viro@zeniv.linux.org.uk 
     
    
      2017-07-04 17:31:18 
      2017-07-04 17:31:18 
      Al Viro 
      viro@zeniv.linux.org.uk 
     
    
      2017-06-04 01:33:26 
      2017-06-04 01:33:26 
      Al Viro 
      viro@zeniv.linux.org.uk 
     
    
      2017-06-03 20:19:18 
      2017-06-03 20:19:18 
      Al Viro 
      viro@zeniv.linux.org.uk 
     
    
      2017-05-26 03:25:07 
      2017-05-26 03:25:07 
      Al Viro 
      viro@zeniv.linux.org.uk 
     
    
      2017-05-25 20:27:27 
      2017-05-25 20:27:27 
      Al Viro 
      viro@zeniv.linux.org.uk 
     
    
      2017-05-25 20:24:20 
      2017-05-25 20:24:20 
      Al Viro 
      viro@zeniv.linux.org.uk 
     
    
      2017-05-25 19:47:44 
      2017-05-25 19:47:44 
      Al Viro 
      viro@zeniv.linux.org.uk 
     
    
      2017-05-25 19:45:04 
      2017-05-25 19:45:04 
      Al Viro 
      viro@zeniv.linux.org.uk 
     
    
      2017-05-25 17:33:11 
      2017-05-25 17:33:11 
      Al Viro 
      viro@zeniv.linux.org.uk 
     
    
      2017-05-25 17:28:51 
      2017-05-25 17:28:51 
      Al Viro 
      viro@zeniv.linux.org.uk 
     
    
      2017-05-25 17:24:59 
      2017-05-25 17:24:59 
      Al Viro 
      viro@zeniv.linux.org.uk 
     
    
      2017-05-25 17:17:17 
      2017-05-25 17:17:17 
      Al Viro 
      viro@zeniv.linux.org.uk 
     
    
      2017-06-27 22:34:53 
      2017-06-27 22:34:53 
      Al Viro 
      viro@zeniv.linux.org.uk 
     
    
      2017-06-27 22:24:21 
      2017-06-27 22:24:21 
      Al Viro 
      viro@zeniv.linux.org.uk 
     
    
      2017-06-27 23:32:04 
      2017-06-27 23:32:04 
      Al Viro 
      viro@zeniv.linux.org.uk 
     
    
      2017-06-28 01:32:36 
      2017-06-28 01:32:36 
      Al Viro 
      viro@zeniv.linux.org.uk 
     
    
      2017-05-03 00:06:33 
      2017-05-03 00:06:33 
      Al Viro 
      viro@zeniv.linux.org.uk 
     
    
      2017-05-02 23:52:17 
      2017-05-02 23:52:17 
      Al Viro 
      viro@zeniv.linux.org.uk 
     
    
      2017-05-13 22:43:00 
      2017-05-13 22:43:00 
      Al Viro 
      viro@zeniv.linux.org.uk 
     
    
      2017-05-13 22:39:01 
      2017-05-13 22:39:01 
      Al Viro 
      viro@zeniv.linux.org.uk 
     
    
      2017-05-13 22:31:26 
      2017-05-13 22:31:26 
      Al Viro 
      viro@zeniv.linux.org.uk 
     
    
      ... 
      ... 
      ... 
      ... 
     
    
      2005-04-26 14:43:41 
      2005-04-26 14:43:41 
      Al Viro 
      viro@www.linux.org.uk 
     
    
      2005-04-26 14:43:41 
      2005-04-26 14:43:41 
      Al Viro 
      viro@www.linux.org.uk 
     
    
      2005-04-26 14:43:41 
      2005-04-26 14:43:41 
      Al Viro 
      viro@www.linux.org.uk 
     
    
      2005-04-26 14:43:40 
      2005-04-26 14:43:40 
      Al Viro 
      viro@www.linux.org.uk 
     
    
      2005-04-26 04:40:39 
      2005-04-26 04:40:39 
      Al Viro 
      viro@parcelfarce.linux.theplanet.co.uk 
     
    
      2005-04-26 01:32:13 
      2005-04-26 01:32:13 
      Al Viro 
      viro@www.linux.org.uk 
     
    
      2005-04-26 01:32:12 
      2005-04-26 01:32:12 
      Al Viro 
      viro@www.linux.org.uk 
     
    
      2005-04-26 01:32:12 
      2005-04-26 01:32:12 
      Al Viro 
      viro@www.linux.org.uk 
     
    
      2005-04-26 01:32:12 
      2005-04-26 01:32:12 
      Al Viro 
      viro@www.linux.org.uk 
     
    
      2005-04-25 14:55:59 
      2005-04-25 14:55:59 
      Al Viro 
      viro@www.linux.org.uk 
     
    
      2005-04-25 14:55:58 
      2005-04-25 14:55:58 
      Al Viro 
      viro@www.linux.org.uk 
     
    
      2005-04-25 14:55:58 
      2005-04-25 14:55:58 
      Al Viro 
      viro@www.linux.org.uk 
     
    
      2005-04-25 14:55:58 
      2005-04-25 14:55:58 
      Al Viro 
      viro@www.linux.org.uk 
     
    
      2005-04-25 14:55:58 
      2005-04-25 14:55:58 
      Al Viro 
      viro@www.linux.org.uk 
     
    
      2005-04-25 14:55:57 
      2005-04-25 14:55:57 
      Al Viro 
      viro@www.linux.org.uk 
     
    
      2005-04-25 14:55:57 
      2005-04-25 14:55:57 
      Al Viro 
      viro@www.linux.org.uk 
     
    
      2005-04-24 21:58:08 
      2005-04-24 21:58:08 
      Al Viro 
      viro@www.linux.org.uk 
     
    
      2005-04-24 21:58:08 
      2005-04-24 21:58:08 
      Al Viro 
      viro@www.linux.org.uk 
     
    
      2005-04-24 21:58:08 
      2005-04-24 21:58:08 
      Al Viro 
      viro@www.linux.org.uk 
     
    
      2005-04-24 19:28:36 
      2005-04-24 19:28:36 
      Al Viro 
      viro@parcelfarce.linux.theplanet.co.uk 
     
    
      2005-04-24 19:28:36 
      2005-04-24 19:28:36 
      Al Viro 
      viro@www.linux.org.uk 
     
    
      2005-04-24 19:28:35 
      2005-04-24 19:28:35 
      Al Viro 
      viro@www.linux.org.uk 
     
    
      2005-04-24 19:28:35 
      2005-04-24 19:28:35 
      Al Viro 
      viro@www.linux.org.uk 
     
    
      2005-04-24 19:28:35 
      2005-04-24 19:28:35 
      Al Viro 
      viro@www.linux.org.uk 
     
    
      2005-04-24 19:28:35 
      2005-04-24 19:28:35 
      Al Viro 
      viro@www.linux.org.uk 
     
    
      2005-04-24 19:28:35 
      2005-04-24 19:28:35 
      Al Viro 
      viro@www.linux.org.uk 
     
    
      2005-04-24 19:28:34 
      2005-04-24 19:28:34 
      Al Viro 
      viro@www.linux.org.uk 
     
    
      2005-04-24 19:28:34 
      2005-04-24 19:28:34 
      Al Viro 
      viro@www.linux.org.uk 
     
    
      2005-04-24 19:28:34 
      2005-04-24 19:28:34 
      Al Viro 
      viro@www.linux.org.uk 
     
    
      2005-04-21 00:12:41 
      2005-04-21 00:12:41 
      Al Viro 
      viro@parcelfarce.linux.theplanet.co.uk 
     
  
5996 rows × 3 columns
Examine the TOP 5 contributors of the repository. Which person(s) used multiple author names? (0) Linus Torvalds (1) David S. Millter (1) Mark Brown (1) (2) Al Viro
Hint: One author name is a little bit tricky. We'll asume, that both author names are the same person.
(Hint: Count the commits per author
Content source: feststelltaste/software-analytics
Similar notebooks: